/******************************************************************************
 * @file     csky_vdsp2_math.h
 * @brief    Public header file for CSI DSP Library.
 * @version  V1.0
 * @date     20. Dec 2016
 ******************************************************************************/
/* ---------------------------------------------------------------------------
 * Copyright (C) 2016 CSKY Limited. All rights reserved.
 *
 * Redistribution and use of this software in source and binary forms,
 * with or without modification, are permitted provided that the following
 * conditions are met:
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright notice,
 *     this list of conditions and the following disclaimer in the documentation
 *     and/or other materials provided with the distribution.
 *   * Neither the name of CSKY Ltd. nor the names of CSKY's contributors may
 *     be used to endorse or promote products derived from this software without
 *     specific prior written permission of CSKY Ltd.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 * -------------------------------------------------------------------------- */

#ifndef _CSKY_VDSP2_MATH_H
#define _CSKY_VDSP2_MATH_H

#include <stdint.h>
#include <string.h>

#ifdef CSKY_VDSP2_MATH_DSP
#include "csi_core.h"
#endif

#ifdef   __cplusplus
extern "C"
{
#endif
  /**
   * @brief 8-bit fractional data type in 1.7 format.
   */
  typedef int8_t q7_t;

  /**
   * @brief 16-bit fractional data type in 1.15 format.
   */
  typedef int16_t q15_t;

  /**
   * @brief 32-bit fractional data type in 1.31 format.
   */
  typedef int32_t q31_t;

  /**
   * @brief 64-bit fractional data type in 1.63 format.
   */
  typedef int64_t q63_t;

  /**
   * @brief 32-bit floating-point type definition.
   */
  typedef float float32_t;

  /**
   * @brief Error status returned by some functions in the library.
   */

  typedef enum
  {
    CSKY_VDSP2_MATH_SUCCESS = 0,                /**< No error */
    CSKY_VDSP2_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
    CSKY_VDSP2_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
    CSKY_VDSP2_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
    CSKY_VDSP2_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
    CSKY_VDSP2_MATH_SINGULAR = -5,              /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
    CSKY_VDSP2_MATH_TEST_FAILURE = -6           /**< Test Failed  */
  } csky_vdsp2_status;

  /**
   * @brief Instance structure for the Q7 FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;        /**< number of filter coefficients in the filter. */
    q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
  } csky_vdsp2_fir_instance_q7;

  /**
   * @brief Instance structure for the Q15 FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;         /**< number of filter coefficients in the filter. */
    q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
  } csky_vdsp2_fir_instance_q15;

  /**
   * @brief Instance structure for the Q31 FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;         /**< number of filter coefficients in the filter. */
    q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
  } csky_vdsp2_fir_instance_q31;

  /**
   * @brief Instance structure for the floating-point FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;     /**< number of filter coefficients in the filter. */
    float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
  } csky_vdsp2_fir_instance_f32;

  void csky_vdsp2_fir_q7(
  const csky_vdsp2_fir_instance_q7 * S,
  q7_t * pSrc,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_init_q7(
  csky_vdsp2_fir_instance_q7 * S,
  uint16_t numTaps,
  q7_t * pCoeffs,
  q7_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_fir_q15(
  const csky_vdsp2_fir_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_fast_q15(
  const csky_vdsp2_fir_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_fir_init_q15(
  csky_vdsp2_fir_instance_q15 * S,
  uint16_t numTaps,
  q15_t * pCoeffs,
  q15_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_fir_q31(
  const csky_vdsp2_fir_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_fast_q31(
  const csky_vdsp2_fir_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_init_q31(
  csky_vdsp2_fir_instance_q31 * S,
  uint16_t numTaps,
  q31_t * pCoeffs,
  q31_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_fir_f32(
  const csky_vdsp2_fir_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_init_f32(
  csky_vdsp2_fir_instance_f32 * S,
  uint16_t numTaps,
  float32_t * pCoeffs,
  float32_t * pState,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the Q15 Biquad cascade filter.
   */
  typedef struct
  {
    int8_t numStages;        /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
    q15_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
    q15_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
    int8_t postShift;        /**< Additional shift, in bits, applied to each output sample. */
  } csky_vdsp2_biquad_casd_df1_inst_q15;

  /**
   * @brief Instance structure for the Q31 Biquad cascade filter.
   */
  typedef struct
  {
    uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
    q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
    q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
    uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
  } csky_vdsp2_biquad_casd_df1_inst_q31;

 /**
   * @brief Instance structure for the Q31 Biquad cascade filter.
   */

  /**
   * @brief Instance structure for the floating-point Biquad cascade filter.
   */
  typedef struct
  {
    uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
    float32_t *pState;       /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
    float32_t *pCoeffs;      /**< Points to the array of coefficients.  The array is of length 5*numStages. */
  } csky_vdsp2_biquad_casd_df1_inst_f32;

  void csky_vdsp2_biquad_cascade_df1_q15(
  const csky_vdsp2_biquad_casd_df1_inst_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_biquad_cascade_df1_init_q15(
  csky_vdsp2_biquad_casd_df1_inst_q15 * S,
  uint8_t numStages,
  q15_t * pCoeffs,
  q15_t * pState,
  int8_t postShift);

  void csky_vdsp2_biquad_cascade_df1_fast_q15(
  const csky_vdsp2_biquad_casd_df1_inst_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_biquad_cascade_df1_q31(
  const csky_vdsp2_biquad_casd_df1_inst_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_biquad_cascade_df1_fast_q31(
  const csky_vdsp2_biquad_casd_df1_inst_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_biquad_cascade_df1_init_q31(
  csky_vdsp2_biquad_casd_df1_inst_q31 * S,
  uint8_t numStages,
  q31_t * pCoeffs,
  q31_t * pState,
  int8_t postShift);

  void csky_vdsp2_biquad_cascade_df1_f32(
  const csky_vdsp2_biquad_casd_df1_inst_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_biquad_cascade_df1_init_f32(
  csky_vdsp2_biquad_casd_df1_inst_f32 * S,
  uint8_t numStages,
  float32_t * pCoeffs,
  float32_t * pState);


  /**
   * @brief Instance structure for the floating-point matrix structure.
   */
  typedef struct
  {
    uint16_t numRows;     /**< number of rows of the matrix.     */
    uint16_t numCols;     /**< number of columns of the matrix.  */
    float32_t *pData;     /**< points to the data of the matrix. */
  } csky_vdsp2_matrix_instance_f32;

  /**
   * @brief Instance structure for the Q15 matrix structure.
   */
  typedef struct
  {
    uint16_t numRows;     /**< number of rows of the matrix.     */
    uint16_t numCols;     /**< number of columns of the matrix.  */
    q15_t *pData;         /**< points to the data of the matrix. */
  } csky_vdsp2_matrix_instance_q15;

  /**
   * @brief Instance structure for the Q31 matrix structure.
   */
  typedef struct
  {
    uint16_t numRows;     /**< number of rows of the matrix.     */
    uint16_t numCols;     /**< number of columns of the matrix.  */
    q31_t *pData;         /**< points to the data of the matrix. */
  } csky_vdsp2_matrix_instance_q31;

  csky_vdsp2_status csky_vdsp2_mat_add_f32(
  const csky_vdsp2_matrix_instance_f32 * pSrcA,
  const csky_vdsp2_matrix_instance_f32 * pSrcB,
  csky_vdsp2_matrix_instance_f32 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_add_q15(
  const csky_vdsp2_matrix_instance_q15 * pSrcA,
  const csky_vdsp2_matrix_instance_q15 * pSrcB,
  csky_vdsp2_matrix_instance_q15 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_add_q31(
  const csky_vdsp2_matrix_instance_q31 * pSrcA,
  const csky_vdsp2_matrix_instance_q31 * pSrcB,
  csky_vdsp2_matrix_instance_q31 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_cmplx_mult_f32(
  const csky_vdsp2_matrix_instance_f32 * pSrcA,
  const csky_vdsp2_matrix_instance_f32 * pSrcB,
  csky_vdsp2_matrix_instance_f32 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_cmplx_mult_q15(
  const csky_vdsp2_matrix_instance_q15 * pSrcA,
  const csky_vdsp2_matrix_instance_q15 * pSrcB,
  csky_vdsp2_matrix_instance_q15 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_cmplx_mult_q31(
  const csky_vdsp2_matrix_instance_q31 * pSrcA,
  const csky_vdsp2_matrix_instance_q31 * pSrcB,
  csky_vdsp2_matrix_instance_q31 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_trans_f32(
  const csky_vdsp2_matrix_instance_f32 * pSrc,
  csky_vdsp2_matrix_instance_f32 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_trans_q15(
  const csky_vdsp2_matrix_instance_q15 * pSrc,
  csky_vdsp2_matrix_instance_q15 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_trans_q31(
  const csky_vdsp2_matrix_instance_q31 * pSrc,
  csky_vdsp2_matrix_instance_q31 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_mult_f32(
  const csky_vdsp2_matrix_instance_f32 * pSrcA,
  const csky_vdsp2_matrix_instance_f32 * pSrcB,
  csky_vdsp2_matrix_instance_f32 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_mult_trans_f32(
  const csky_vdsp2_matrix_instance_f32 * pSrcA,
  const csky_vdsp2_matrix_instance_f32 * pSrcB,
  csky_vdsp2_matrix_instance_f32 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_mult_q15(
  const csky_vdsp2_matrix_instance_q15 * pSrcA,
  const csky_vdsp2_matrix_instance_q15 * pSrcB,
  csky_vdsp2_matrix_instance_q15 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_mult_trans_q15(
  const csky_vdsp2_matrix_instance_q15 * pSrcA,
  const csky_vdsp2_matrix_instance_q15 * pSrcB,
  csky_vdsp2_matrix_instance_q15 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_mult_q31(
  const csky_vdsp2_matrix_instance_q31 * pSrcA,
  const csky_vdsp2_matrix_instance_q31 * pSrcB,
  csky_vdsp2_matrix_instance_q31 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_mult_trans_q31(
  const csky_vdsp2_matrix_instance_q31 * pSrcA,
  const csky_vdsp2_matrix_instance_q31 * pSrcB,
  csky_vdsp2_matrix_instance_q31 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_sub_f32(
  const csky_vdsp2_matrix_instance_f32 * pSrcA,
  const csky_vdsp2_matrix_instance_f32 * pSrcB,
  csky_vdsp2_matrix_instance_f32 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_sub_q15(
  const csky_vdsp2_matrix_instance_q15 * pSrcA,
  const csky_vdsp2_matrix_instance_q15 * pSrcB,
  csky_vdsp2_matrix_instance_q15 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_sub_q31(
  const csky_vdsp2_matrix_instance_q31 * pSrcA,
  const csky_vdsp2_matrix_instance_q31 * pSrcB,
  csky_vdsp2_matrix_instance_q31 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_scale_f32(
  const csky_vdsp2_matrix_instance_f32 * pSrc,
  float32_t scale,
  csky_vdsp2_matrix_instance_f32 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_scale_q15(
  const csky_vdsp2_matrix_instance_q15 * pSrc,
  q15_t scaleFract,
  int32_t shift,
  csky_vdsp2_matrix_instance_q15 * pDst);

  csky_vdsp2_status csky_vdsp2_mat_scale_q31(
  const csky_vdsp2_matrix_instance_q31 * pSrc,
  q31_t scaleFract,
  int32_t shift,
  csky_vdsp2_matrix_instance_q31 * pDst);

  void csky_vdsp2_mat_init_q31(
  csky_vdsp2_matrix_instance_q31 * S,
  uint16_t nRows,
  uint16_t nColumns,
  q31_t * pData);

  void csky_vdsp2_mat_init_q15(
  csky_vdsp2_matrix_instance_q15 * S,
  uint16_t nRows,
  uint16_t nColumns,
  q15_t * pData);

  void csky_vdsp2_mat_init_f32(
  csky_vdsp2_matrix_instance_f32 * S,
  uint16_t nRows,
  uint16_t nColumns,
  float32_t * pData);

  void csky_vdsp2_mult_q15xq31_sht(
  q15_t * pSrcA,
  q31_t * pSrcB,
  uint32_t shiftValue,
  uint32_t blockSize);

  void csky_vdsp2_mult_q7(
  q7_t * pSrcA,
  q7_t * pSrcB,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_mult_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_mult_rnd_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_mult_q31(
  q31_t * pSrcA,
  q31_t * pSrcB,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_mult_f32(
  float32_t * pSrcA,
  float32_t * pSrcB,
  float32_t * pDst,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the Q15 CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                 /**< length of the FFT. */
    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
    q15_t *pTwiddle;                 /**< points to the Sin twiddle factor table. */
    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
  } csky_vdsp2_cfft_radix2_instance_q15;

  /**
   * @brief Instance structure for the Q15 CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                 /**< length of the FFT. */
    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
    q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
  } csky_vdsp2_cfft_radix4_instance_q15;

  /**
   * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                 /**< length of the FFT. */
    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
    q31_t *pTwiddle;                 /**< points to the Twiddle factor table. */
    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
  } csky_vdsp2_cfft_radix2_instance_q31;

  /**
   * @brief Instance structure for the Q31 CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                 /**< length of the FFT. */
    uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
    uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
    q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
    uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
    uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
  } csky_vdsp2_cfft_radix4_instance_q31;

  /**
   * @brief Instance structure for the floating-point CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                   /**< length of the FFT. */
    uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
    uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
    float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
    uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
    uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
    float32_t onebyfftLen;             /**< value of 1/fftLen. */
  } csky_vdsp2_cfft_radix2_instance_f32;

  /**
   * @brief Instance structure for the floating-point CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                   /**< length of the FFT. */
    uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
    uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
    float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
    uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
    uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
    float32_t onebyfftLen;             /**< value of 1/fftLen. */
  } csky_vdsp2_cfft_radix4_instance_f32;

  /**
   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                   /**< length of the FFT. */
    const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
    uint16_t bitRevLength;             /**< bit reversal table length. */
  } csky_vdsp2_cfft_instance_q15;

void csky_vdsp2_cfft_q15(
    const csky_vdsp2_cfft_instance_q15 * S,
    q15_t * p1,
    uint8_t ifftFlag,
    uint8_t bitReverseFlag);

void csky_vdsp2_cfft_fast_q15(
    const csky_vdsp2_cfft_instance_q15 * S,
    q15_t * p1,
    uint8_t ifftFlag,
    uint8_t bitReverseFlag);

  /**
   * @brief Instance structure for the fixed-point CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                   /**< length of the FFT. */
    const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
    uint16_t bitRevLength;             /**< bit reversal table length. */
  } csky_vdsp2_cfft_instance_q31;

void csky_vdsp2_cfft_q31(
    const csky_vdsp2_cfft_instance_q31 * S,
    q31_t * p1,
    uint8_t ifftFlag,
    uint8_t bitReverseFlag);

void csky_vdsp2_cfft_fast_q31(
    const csky_vdsp2_cfft_instance_q31 * S,
    q31_t * p1,
    uint8_t ifftFlag,
    uint8_t bitReverseFlag);

  /**
   * @brief Instance structure for the floating-point CFFT/CIFFT function.
   */
  typedef struct
  {
    uint16_t fftLen;                   /**< length of the FFT. */
    const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
    const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
    uint16_t bitRevLength;             /**< bit reversal table length. */
  } csky_vdsp2_cfft_instance_f32;

  void csky_vdsp2_cfft_f32(
  const csky_vdsp2_cfft_instance_f32 * S,
  float32_t * p1,
  uint8_t ifftFlag,
  uint8_t bitReverseFlag);

  /**
   * @brief Instance structure for the Q15 RFFT/RIFFT function.
   */
  typedef struct
  {
    uint32_t fftLenReal;                      /**< length of the real FFT. */
    uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
    uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
    uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
    const csky_vdsp2_cfft_instance_q15 *pCfft;      /**< points to the complex FFT instance. */
  } csky_vdsp2_rfft_instance_q15;

  typedef struct
  {
    uint32_t fftLenReal;                      /**< length of the real FFT. */
    uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
    uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
    q15_t *pTwiddleAReal;                     /**< points to the A real twiddle factor table. */
    q15_t *pTwiddleBReal;                     /**< points to the B real twiddle factor table. */
    const csky_vdsp2_cfft_instance_q15 *pCfft;      /**< points to the complex FFT instance. */
  } csky_vdsp2_rfft_fast_instance_q15;

  csky_vdsp2_status csky_vdsp2_rfft_init_q15(
  csky_vdsp2_rfft_instance_q15 * S,
  uint32_t fftLenReal,
  uint32_t ifftFlagR,
  uint32_t bitReverseFlag);

  void csky_vdsp2_rfft_q15(
  const csky_vdsp2_rfft_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst);

  void csky_vdsp2_rfft_fast_q15(
  const csky_vdsp2_rfft_fast_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst);

  /**
   * @brief Instance structure for the Q31 RFFT/RIFFT function.
   */
  typedef struct
  {
    uint32_t fftLenReal;                        /**< length of the real FFT. */
    uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
    uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
    uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
    const csky_vdsp2_cfft_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
  } csky_vdsp2_rfft_instance_q31;

  typedef struct
  {
    uint32_t fftLenReal;                        /**< length of the real FFT. */
    uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
    uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
    q31_t *pTwiddleAReal;                       /**< points to the A real twiddle factor table. */
    q31_t *pTwiddleBReal;                       /**< points to the B real twiddle factor table. */
    const csky_vdsp2_cfft_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
  } csky_vdsp2_rfft_fast_instance_q31;

  csky_vdsp2_status csky_vdsp2_rfft_init_q31(
  csky_vdsp2_rfft_instance_q31 * S,
  uint32_t fftLenReal,
  uint32_t ifftFlagR,
  uint32_t bitReverseFlag);

  void csky_vdsp2_rfft_q31(
  const csky_vdsp2_rfft_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst);

  void csky_vdsp2_rfft_fast_q31(
  const csky_vdsp2_rfft_fast_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst);

  /**
   * @brief Instance structure for the floating-point RFFT/RIFFT function.
   */
  typedef struct
  {
    uint32_t fftLenReal;                        /**< length of the real FFT. */
    uint16_t fftLenBy2;                         /**< length of the complex FFT. */
    uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
    uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
    uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
    float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
    float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
    csky_vdsp2_cfft_radix4_instance_f32 *pCfft;       /**< points to the complex FFT instance. */
  } csky_vdsp2_rfft_instance_f32;

  csky_vdsp2_status csky_vdsp2_rfft_init_f32(
  csky_vdsp2_rfft_instance_f32 * S,
  csky_vdsp2_cfft_radix4_instance_f32 * S_CFFT,
  uint32_t fftLenReal,
  uint32_t ifftFlagR,
  uint32_t bitReverseFlag);

  void csky_vdsp2_cfft_radix4_f32(
  const csky_vdsp2_cfft_instance_f32 * S,
  float32_t * p1,
  uint8_t ifftFlag,
  uint8_t bitReverseFlag,
  float32_t onebyfftLen);

  void csky_vdsp2_cfft_fast_radix4_f32(
  const csky_vdsp2_cfft_instance_f32 * S,
  float32_t * p1,
  uint8_t ifftFlag,
  uint8_t bitReverseFlag,
  float32_t onebyfftLen);

  void csky_vdsp2_cfft_radix2_f32(
  const csky_vdsp2_cfft_instance_f32 * S,
  float32_t * p1,
  uint8_t ifftFlag,
  uint8_t bitReverseFlag,
  float32_t onebyfftLen);

  void csky_vdsp2_rfft_f32(
  const csky_vdsp2_rfft_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst);

  /**
   * @brief Instance structure for the floating-point RFFT/RIFFT function.
   */
typedef struct
  {
    csky_vdsp2_cfft_instance_f32 Sint;     /**< Internal CFFT structure. */
    uint16_t fftLenRFFT;             /**< length of the real sequence */
    float32_t * pTwiddleRFFT;        /**< Twiddle factors real stage  */
  } csky_vdsp2_rfft_fast_instance_f32 ;

csky_vdsp2_status csky_vdsp2_rfft_fast_init_f32 (
   csky_vdsp2_rfft_fast_instance_f32 * S,
   uint16_t fftLen);

void csky_vdsp2_rfft_fast_f32(
  csky_vdsp2_rfft_fast_instance_f32 * S,
  float32_t * p, float32_t * pOut,
  uint8_t ifftFlag);

  /**
   * @brief Instance structure for the floating-point DCT4/IDCT4 function.
   */
  typedef struct
  {
    uint16_t N;                           /**< length of the DCT4. */
    uint16_t Nby2;                        /**< half of the length of the DCT4. */
    float32_t normalize;                  /**< normalizing factor. */
    float32_t *pTwiddle;                  /**< points to the twiddle factor table. */
    float32_t *pCosFactor;                /**< points to the cosFactor table. */
    csky_vdsp2_rfft_fast_instance_f32 *pRfft;   /**< points to the real FFT fast instance. */
    csky_vdsp2_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
  } csky_vdsp2_dct4_instance_f32;

  csky_vdsp2_status csky_vdsp2_dct4_init_f32(
  csky_vdsp2_dct4_instance_f32 * S,
  csky_vdsp2_rfft_fast_instance_f32 * S_RFFT,
  csky_vdsp2_cfft_radix4_instance_f32 * S_CFFT,
  uint16_t N,
  uint16_t Nby2,
  float32_t normalize);

  void csky_vdsp2_dct4_f32(
  const csky_vdsp2_dct4_instance_f32 * S,
  float32_t * pState,
  float32_t * pInlineBuffer);


  /**
   * @brief Instance structure for the Q31 DCT4/IDCT4 function.
   */
  typedef struct
  {
    uint16_t N;                           /**< length of the DCT4. */
    uint16_t Nby2;                        /**< half of the length of the DCT4. */
    q31_t normalize;                      /**< normalizing factor. */
    q31_t *pTwiddle;                      /**< points to the twiddle factor table. */
    q31_t *pCosFactor;                    /**< points to the cosFactor table. */
    csky_vdsp2_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
    csky_vdsp2_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
  } csky_vdsp2_dct4_instance_q31;

  typedef struct
  {
    uint16_t N;                           /**< length of the DCT4. */
    uint16_t Nby2;                        /**< half of the length of the DCT4. */
    q31_t normalize;                      /**< normalizing factor. */
    q31_t *pTwiddle;                      /**< points to the twiddle factor table. */
    q31_t *pCosFactor;                    /**< points to the cosFactor table. */
    csky_vdsp2_rfft_fast_instance_q31 *pRfft;        /**< points to the real FFT instance. */
    csky_vdsp2_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
  } csky_vdsp2_dct4_fast_instance_q31;

  csky_vdsp2_status csky_vdsp2_dct4_init_q31(
  csky_vdsp2_dct4_instance_q31 * S,
  csky_vdsp2_rfft_instance_q31 * S_RFFT,
  csky_vdsp2_cfft_radix4_instance_q31 * S_CFFT,
  uint16_t N,
  uint16_t Nby2,
  q31_t normalize);

  void csky_vdsp2_dct4_q31(
  const csky_vdsp2_dct4_instance_q31 * S,
  q31_t * pState,
  q31_t * pInlineBuffer);

  void csky_vdsp2_dct4_fast_q31(
  const csky_vdsp2_dct4_fast_instance_q31 * S,
  q31_t * pState,
  q31_t * pInlineBuffer);

  /**
   * @brief Instance structure for the Q15 DCT4/IDCT4 function.
   */
  typedef struct
  {
    uint16_t N;                           /**< length of the DCT4. */
    uint16_t Nby2;                        /**< half of the length of the DCT4. */
    q15_t normalize;                      /**< normalizing factor. */
    q15_t *pTwiddle;                      /**< points to the twiddle factor table. */
    q15_t *pCosFactor;                    /**< points to the cosFactor table. */
    csky_vdsp2_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
    csky_vdsp2_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
  } csky_vdsp2_dct4_instance_q15;

  typedef struct
  {
    uint16_t N;                           /**< length of the DCT4. */
    uint16_t Nby2;                        /**< half of the length of the DCT4. */
    q15_t normalize;                      /**< normalizing factor. */
    q15_t *pTwiddle;                      /**< points to the twiddle factor table. */
    q15_t *pCosFactor;                    /**< points to the cosFactor table. */
    csky_vdsp2_rfft_fast_instance_q15 *pRfft;        /**< points to the real FFT instance. */
    csky_vdsp2_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
  } csky_vdsp2_dct4_fast_instance_q15;

  csky_vdsp2_status csky_vdsp2_dct4_init_q15(
  csky_vdsp2_dct4_instance_q15 * S,
  csky_vdsp2_rfft_instance_q15 * S_RFFT,
  csky_vdsp2_cfft_radix4_instance_q15 * S_CFFT,
  uint16_t N,
  uint16_t Nby2,
  q15_t normalize);

  void csky_vdsp2_dct4_q15(
  const csky_vdsp2_dct4_instance_q15 * S,
  q15_t * pState,
  q15_t * pInlineBuffer);

  void csky_vdsp2_dct4_fast_q15(
  const csky_vdsp2_dct4_fast_instance_q15 * S,
  q15_t * pState,
  q15_t * pInlineBuffer);

  void csky_vdsp2_add_f32(
  float32_t * pSrcA,
  float32_t * pSrcB,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_add_q7(
  q7_t * pSrcA,
  q7_t * pSrcB,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_add_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_add_q31(
  q31_t * pSrcA,
  q31_t * pSrcB,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_sub_f32(
  float32_t * pSrcA,
  float32_t * pSrcB,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_sub_q7(
  q7_t * pSrcA,
  q7_t * pSrcB,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_sub_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_sub_q31(
  q31_t * pSrcA,
  q31_t * pSrcB,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_sum_q15(
  q15_t * pSrcA,
  q63_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_scale_f32(
  float32_t * pSrc,
  float32_t scale,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_scale_q7(
  q7_t * pSrc,
  q7_t scaleFract,
  int8_t shift,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_scale_q15(
  q15_t * pSrc,
  q15_t scaleFract,
  int8_t shift,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_scale_q31(
  q31_t * pSrc,
  q31_t scaleFract,
  int8_t shift,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_abs_q7(
  q7_t * pSrc,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_abs_f32(
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_abs_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_abs_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_abs_max_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_abs_max_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);


  void csky_vdsp2_dot_prod_f32(
  float32_t * pSrcA,
  float32_t * pSrcB,
  uint32_t blockSize,
  float32_t * result);

  void csky_vdsp2_dot_prod_q7(
  q7_t * pSrcA,
  q7_t * pSrcB,
  uint32_t blockSize,
  q31_t * result);

  void csky_vdsp2_dot_prod_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  uint32_t blockSize,
  q63_t * result);

  void csky_vdsp2_dot_prod_q31(
  q31_t * pSrcA,
  q31_t * pSrcB,
  uint32_t blockSize,
  q63_t * result);

  void csky_vdsp2_dot_prod_u64xu8(
  uint8_t  * pSrcA,
  uint64_t * pSrcB,
  uint32_t blockSize,
  uint64_t * result);

  void csky_vdsp2_shift_q7(
  q7_t * pSrc,
  int8_t shiftBits,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_shift_q15(
  q15_t * pSrc,
  int8_t shiftBits,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_shift_q31(
  q31_t * pSrc,
  int8_t shiftBits,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_offset_f32(
  float32_t * pSrc,
  float32_t offset,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_offset_q7(
  q7_t * pSrc,
  q7_t offset,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_offset_q15(
  q15_t * pSrc,
  q15_t offset,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_offset_q31(
  q31_t * pSrc,
  q31_t offset,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_negate_f32(
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_negate_q7(
  q7_t * pSrc,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_negate_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_negate_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_copy_f32(
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_copy_q7(
  q7_t * pSrc,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_copy_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_copy_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fill_f32(
  float32_t value,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fill_q7(
  q7_t value,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fill_q15(
  q15_t value,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fill_q31(
  q31_t value,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_conv_f32(
  float32_t * pSrcA,
  uint32_t srcALen,
  float32_t * pSrcB,
  uint32_t srcBLen,
  float32_t * pDst);

  void csky_vdsp2_conv_opt_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  q15_t * pScratch1,
  q15_t * pScratch2);

  void csky_vdsp2_conv_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst);

  void csky_vdsp2_conv_fast_q15(
          q15_t * pSrcA,
          uint32_t srcALen,
          q15_t * pSrcB,
          uint32_t srcBLen,
          q15_t * pDst);

  void csky_vdsp2_conv_fast_opt_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  q15_t * pScratch1,
  q15_t * pScratch2);

  void csky_vdsp2_conv_q31(
  q31_t * pSrcA,
  uint32_t srcALen,
  q31_t * pSrcB,
  uint32_t srcBLen,
  q31_t * pDst);

  void csky_vdsp2_conv_fast_q31(
  q31_t * pSrcA,
  uint32_t srcALen,
  q31_t * pSrcB,
  uint32_t srcBLen,
  q31_t * pDst);

  void csky_vdsp2_conv_opt_q7(
  q7_t * pSrcA,
  uint32_t srcALen,
  q7_t * pSrcB,
  uint32_t srcBLen,
  q7_t * pDst,
  q15_t * pScratch1,
  q15_t * pScratch2);

  void csky_vdsp2_conv_q7(
  q7_t * pSrcA,
  uint32_t srcALen,
  q7_t * pSrcB,
  uint32_t srcBLen,
  q7_t * pDst);

  csky_vdsp2_status csky_vdsp2_conv_partial_f32(
  float32_t * pSrcA,
  uint32_t srcALen,
  float32_t * pSrcB,
  uint32_t srcBLen,
  float32_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints);

  csky_vdsp2_status csky_vdsp2_conv_partial_opt_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints,
  q15_t * pScratch1,
  q15_t * pScratch2);

  csky_vdsp2_status csky_vdsp2_conv_partial_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints);

  csky_vdsp2_status csky_vdsp2_conv_partial_fast_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints);

  csky_vdsp2_status csky_vdsp2_conv_partial_fast_opt_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints,
  q15_t * pScratch1,
  q15_t * pScratch2);

  csky_vdsp2_status csky_vdsp2_conv_partial_q31(
  q31_t * pSrcA,
  uint32_t srcALen,
  q31_t * pSrcB,
  uint32_t srcBLen,
  q31_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints);

  csky_vdsp2_status csky_vdsp2_conv_partial_fast_q31(
  q31_t * pSrcA,
  uint32_t srcALen,
  q31_t * pSrcB,
  uint32_t srcBLen,
  q31_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints);

  csky_vdsp2_status csky_vdsp2_conv_partial_opt_q7(
  q7_t * pSrcA,
  uint32_t srcALen,
  q7_t * pSrcB,
  uint32_t srcBLen,
  q7_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints,
  q15_t * pScratch1,
  q15_t * pScratch2);

  csky_vdsp2_status csky_vdsp2_conv_partial_q7(
  q7_t * pSrcA,
  uint32_t srcALen,
  q7_t * pSrcB,
  uint32_t srcBLen,
  q7_t * pDst,
  uint32_t firstIndex,
  uint32_t numPoints);

  /**
   * @brief Instance structure for the Q15 FIR decimator.
   */
  typedef struct
  {
    uint8_t M;                  /**< decimation factor. */
    uint16_t numTaps;           /**< number of coefficients in the filter. */
    q15_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
    q15_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
  } csky_vdsp2_fir_decimate_instance_q15;

  /**
   * @brief Instance structure for the Q31 FIR decimator.
   */
  typedef struct
  {
    uint8_t M;                  /**< decimation factor. */
    uint16_t numTaps;           /**< number of coefficients in the filter. */
    q31_t *pCoeffs;             /**< points to the coefficient array. The array is of length numTaps.*/
    q31_t *pState;              /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
  } csky_vdsp2_fir_decimate_instance_q31;

  /**
   * @brief Instance structure for the floating-point FIR decimator.
   */
  typedef struct
  {
    uint8_t M;                  /**< decimation factor. */
    uint16_t numTaps;           /**< number of coefficients in the filter. */
    float32_t *pCoeffs;         /**< points to the coefficient array. The array is of length numTaps.*/
    float32_t *pState;          /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
  } csky_vdsp2_fir_decimate_instance_f32;

  void csky_vdsp2_fir_decimate_f32(
  const csky_vdsp2_fir_decimate_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_fir_decimate_init_f32(
  csky_vdsp2_fir_decimate_instance_f32 * S,
  uint16_t numTaps,
  uint8_t M,
  float32_t * pCoeffs,
  float32_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_fir_decimate_q15(
  const csky_vdsp2_fir_decimate_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_decimate_fast_q15(
  const csky_vdsp2_fir_decimate_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_fir_decimate_init_q15(
  csky_vdsp2_fir_decimate_instance_q15 * S,
  uint16_t numTaps,
  uint8_t M,
  q15_t * pCoeffs,
  q15_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_fir_decimate_q31(
  const csky_vdsp2_fir_decimate_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_decimate_fast_q31(
  csky_vdsp2_fir_decimate_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_fir_decimate_init_q31(
  csky_vdsp2_fir_decimate_instance_q31 * S,
  uint16_t numTaps,
  uint8_t M,
  q31_t * pCoeffs,
  q31_t * pState,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the Q15 FIR interpolator.
   */
  typedef struct
  {
    uint8_t L;                      /**< upsample factor. */
    uint16_t phaseLength;           /**< length of each polyphase filter component. */
    q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
    q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
  } csky_vdsp2_fir_interpolate_instance_q15;

  /**
   * @brief Instance structure for the Q31 FIR interpolator.
   */
  typedef struct
  {
    uint8_t L;                      /**< upsample factor. */
    uint16_t phaseLength;           /**< length of each polyphase filter component. */
    q31_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
    q31_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
  } csky_vdsp2_fir_interpolate_instance_q31;

  /**
   * @brief Instance structure for the floating-point FIR interpolator.
   */
  typedef struct
  {
    uint8_t L;                     /**< upsample factor. */
    uint16_t phaseLength;          /**< length of each polyphase filter component. */
    float32_t *pCoeffs;            /**< points to the coefficient array. The array is of length L*phaseLength. */
    float32_t *pState;             /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
  } csky_vdsp2_fir_interpolate_instance_f32;

  void csky_vdsp2_fir_interpolate_q15(
  const csky_vdsp2_fir_interpolate_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_fir_interpolate_init_q15(
  csky_vdsp2_fir_interpolate_instance_q15 * S,
  uint8_t L,
  uint16_t numTaps,
  q15_t * pCoeffs,
  q15_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_fir_interpolate_q31(
  const csky_vdsp2_fir_interpolate_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_fir_interpolate_init_q31(
  csky_vdsp2_fir_interpolate_instance_q31 * S,
  uint8_t L,
  uint16_t numTaps,
  q31_t * pCoeffs,
  q31_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_fir_interpolate_f32(
  const csky_vdsp2_fir_interpolate_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_fir_interpolate_init_f32(
  csky_vdsp2_fir_interpolate_instance_f32 * S,
  uint8_t L,
  uint16_t numTaps,
  float32_t * pCoeffs,
  float32_t * pState,
  uint32_t blockSize);

  /**
   * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
   */
  typedef struct
  {
    uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
    float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
    float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
  } csky_vdsp2_biquad_cascade_df2T_instance_f32;

  /**
   * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
   */
  typedef struct
  {
    uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
    float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
    float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
  } csky_vdsp2_biquad_cascade_stereo_df2T_instance_f32;

  void csky_vdsp2_biquad_cascade_df2T_f32(
  const csky_vdsp2_biquad_cascade_df2T_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_biquad_cascade_stereo_df2T_f32(
  const csky_vdsp2_biquad_cascade_stereo_df2T_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_biquad_cascade_df2T_init_f32(
  csky_vdsp2_biquad_cascade_df2T_instance_f32 * S,
  uint8_t numStages,
  float32_t * pCoeffs,
  float32_t * pState);

  void csky_vdsp2_biquad_cascade_stereo_df2T_init_f32(
  csky_vdsp2_biquad_cascade_stereo_df2T_instance_f32 * S,
  uint8_t numStages,
  float32_t * pCoeffs,
  float32_t * pState);

  /**
   * @brief Instance structure for the Q15 FIR lattice filter.
   */
  typedef struct
  {
    uint16_t numStages;                  /**< number of filter stages. */
    q15_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
    q15_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
  } csky_vdsp2_fir_lattice_instance_q15;

  /**
   * @brief Instance structure for the Q31 FIR lattice filter.
   */
  typedef struct
  {
    uint16_t numStages;                  /**< number of filter stages. */
    q31_t *pState;                       /**< points to the state variable array. The array is of length numStages. */
    q31_t *pCoeffs;                      /**< points to the coefficient array. The array is of length numStages. */
  } csky_vdsp2_fir_lattice_instance_q31;

  /**
   * @brief Instance structure for the floating-point FIR lattice filter.
   */
  typedef struct
  {
    uint16_t numStages;                  /**< number of filter stages. */
    float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
    float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
  } csky_vdsp2_fir_lattice_instance_f32;

  void csky_vdsp2_fir_lattice_init_q15(
  csky_vdsp2_fir_lattice_instance_q15 * S,
  uint16_t numStages,
  q15_t * pCoeffs,
  q15_t * pState);

  void csky_vdsp2_fir_lattice_q15(
  const csky_vdsp2_fir_lattice_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_lattice_init_q31(
  csky_vdsp2_fir_lattice_instance_q31 * S,
  uint16_t numStages,
  q31_t * pCoeffs,
  q31_t * pState);

  void csky_vdsp2_fir_lattice_q31(
  const csky_vdsp2_fir_lattice_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_fir_lattice_init_f32(
  csky_vdsp2_fir_lattice_instance_f32 * S,
  uint16_t numStages,
  float32_t * pCoeffs,
  float32_t * pState);

  void csky_vdsp2_fir_lattice_f32(
  const csky_vdsp2_fir_lattice_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the Q15 IIR lattice filter.
   */
  typedef struct
  {
    uint16_t numStages;                  /**< number of stages in the filter. */
    q15_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
    q15_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
    q15_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
  } csky_vdsp2_iir_lattice_instance_q15;

  /**
   * @brief Instance structure for the Q31 IIR lattice filter.
   */
  typedef struct
  {
    uint16_t numStages;                  /**< number of stages in the filter. */
    q31_t *pState;                       /**< points to the state variable array. The array is of length numStages+blockSize. */
    q31_t *pkCoeffs;                     /**< points to the reflection coefficient array. The array is of length numStages. */
    q31_t *pvCoeffs;                     /**< points to the ladder coefficient array. The array is of length numStages+1. */
  } csky_vdsp2_iir_lattice_instance_q31;

  /**
   * @brief Instance structure for the floating-point IIR lattice filter.
   */
  typedef struct
  {
    uint16_t numStages;                  /**< number of stages in the filter. */
    float32_t *pState;                   /**< points to the state variable array. The array is of length numStages+blockSize. */
    float32_t *pkCoeffs;                 /**< points to the reflection coefficient array. The array is of length numStages. */
    float32_t *pvCoeffs;                 /**< points to the ladder coefficient array. The array is of length numStages+1. */
  } csky_vdsp2_iir_lattice_instance_f32;

  void csky_vdsp2_iir_lattice_f32(
  const csky_vdsp2_iir_lattice_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_iir_lattice_init_f32(
  csky_vdsp2_iir_lattice_instance_f32 * S,
  uint16_t numStages,
  float32_t * pkCoeffs,
  float32_t * pvCoeffs,
  float32_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_iir_lattice_q31(
  const csky_vdsp2_iir_lattice_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_iir_lattice_init_q31(
  csky_vdsp2_iir_lattice_instance_q31 * S,
  uint16_t numStages,
  q31_t * pkCoeffs,
  q31_t * pvCoeffs,
  q31_t * pState,
  uint32_t blockSize);

  void csky_vdsp2_iir_lattice_q15(
  const csky_vdsp2_iir_lattice_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_iir_lattice_init_q15(
  csky_vdsp2_iir_lattice_instance_q15 * S,
  uint16_t numStages,
  q15_t * pkCoeffs,
  q15_t * pvCoeffs,
  q15_t * pState,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the floating-point LMS filter.
   */
  typedef struct
  {
    uint16_t numTaps;    /**< number of coefficients in the filter. */
    float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
    float32_t mu;        /**< step size that controls filter coefficient updates. */
  } csky_vdsp2_lms_instance_f32;

  void csky_vdsp2_lms_f32(
  const csky_vdsp2_lms_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pRef,
  float32_t * pOut,
  float32_t * pErr,
  uint32_t blockSize);

  void csky_vdsp2_lms_init_f32(
  csky_vdsp2_lms_instance_f32 * S,
  uint16_t numTaps,
  float32_t * pCoeffs,
  float32_t * pState,
  float32_t mu,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the Q15 LMS filter.
   */
  typedef struct
  {
    uint16_t numTaps;    /**< number of coefficients in the filter. */
    q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
    q15_t mu;            /**< step size that controls filter coefficient updates. */
    uint32_t postShift;  /**< bit shift applied to coefficients. */
  } csky_vdsp2_lms_instance_q15;

  void csky_vdsp2_lms_init_q15(
  csky_vdsp2_lms_instance_q15 * S,
  uint16_t numTaps,
  q15_t * pCoeffs,
  q15_t * pState,
  q15_t mu,
  uint32_t blockSize,
  uint32_t postShift);

  void csky_vdsp2_lms_q15(
  const csky_vdsp2_lms_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pRef,
  q15_t * pOut,
  q15_t * pErr,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the Q31 LMS filter.
   */
  typedef struct
  {
    uint16_t numTaps;    /**< number of coefficients in the filter. */
    q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
    q31_t mu;            /**< step size that controls filter coefficient updates. */
    uint32_t postShift;  /**< bit shift applied to coefficients. */
  } csky_vdsp2_lms_instance_q31;

  void csky_vdsp2_lms_q31(
  const csky_vdsp2_lms_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pRef,
  q31_t * pOut,
  q31_t * pErr,
  uint32_t blockSize);

  void csky_vdsp2_lms_init_q31(
  csky_vdsp2_lms_instance_q31 * S,
  uint16_t numTaps,
  q31_t * pCoeffs,
  q31_t * pState,
  q31_t mu,
  uint32_t blockSize,
  uint32_t postShift);


  /**
   * @brief Instance structure for the floating-point normalized LMS filter.
   */
  typedef struct
  {
    uint16_t numTaps;     /**< number of coefficients in the filter. */
    float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
    float32_t mu;         /**< step size that control filter coefficient updates. */
    float32_t energy;     /**< saves previous frame energy. */
    float32_t x0;         /**< saves previous input sample. */
  } csky_vdsp2_lms_norm_instance_f32;

  void csky_vdsp2_lms_norm_f32(
  csky_vdsp2_lms_norm_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pRef,
  float32_t * pOut,
  float32_t * pErr,
  uint32_t blockSize);

  void csky_vdsp2_lms_norm_init_f32(
  csky_vdsp2_lms_norm_instance_f32 * S,
  uint16_t numTaps,
  float32_t * pCoeffs,
  float32_t * pState,
  float32_t mu,
  uint32_t blockSize);


  /**
   * @brief Instance structure for the Q31 normalized LMS filter.
   */
  typedef struct
  {
    uint16_t numTaps;     /**< number of coefficients in the filter. */
    q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
    q31_t mu;             /**< step size that controls filter coefficient updates. */
    uint8_t postShift;    /**< bit shift applied to coefficients. */
    q31_t *recipTable;    /**< points to the reciprocal initial value table. */
    q31_t energy;         /**< saves previous frame energy. */
    q31_t x0;             /**< saves previous input sample. */
  } csky_vdsp2_lms_norm_instance_q31;

  void csky_vdsp2_lms_norm_q31(
  csky_vdsp2_lms_norm_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pRef,
  q31_t * pOut,
  q31_t * pErr,
  uint32_t blockSize);

  void csky_vdsp2_lms_norm_init_q31(
  csky_vdsp2_lms_norm_instance_q31 * S,
  uint16_t numTaps,
  q31_t * pCoeffs,
  q31_t * pState,
  q31_t mu,
  uint32_t blockSize,
  uint8_t postShift);


  /**
   * @brief Instance structure for the Q15 normalized LMS filter.
   */
  typedef struct
  {
    uint16_t numTaps;     /**< Number of coefficients in the filter. */
    q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
    q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
    q15_t mu;             /**< step size that controls filter coefficient updates. */
    uint8_t postShift;    /**< bit shift applied to coefficients. */
    q15_t *recipTable;    /**< Points to the reciprocal initial value table. */
    q15_t energy;         /**< saves previous frame energy. */
    q15_t x0;             /**< saves previous input sample. */
  } csky_vdsp2_lms_norm_instance_q15;

  void csky_vdsp2_lms_norm_q15(
  csky_vdsp2_lms_norm_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pRef,
  q15_t * pOut,
  q15_t * pErr,
  uint32_t blockSize);

  void csky_vdsp2_lms_norm_init_q15(
  csky_vdsp2_lms_norm_instance_q15 * S,
  uint16_t numTaps,
  q15_t * pCoeffs,
  q15_t * pState,
  q15_t mu,
  uint32_t blockSize,
  uint8_t postShift);

  void csky_vdsp2_correlate_f32(
  float32_t * pSrcA,
  uint32_t srcALen,
  float32_t * pSrcB,
  uint32_t srcBLen,
  float32_t * pDst);

  void csky_vdsp2_correlate_opt_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  q15_t * pScratch);

  void csky_vdsp2_correlate_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst);

  void csky_vdsp2_correlate_fast_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst);

  void csky_vdsp2_correlate_fast_opt_q15(
  q15_t * pSrcA,
  uint32_t srcALen,
  q15_t * pSrcB,
  uint32_t srcBLen,
  q15_t * pDst,
  q15_t * pScratch);

  void csky_vdsp2_correlate_q31(
  q31_t * pSrcA,
  uint32_t srcALen,
  q31_t * pSrcB,
  uint32_t srcBLen,
  q31_t * pDst);

  void csky_vdsp2_correlate_fast_q31(
  q31_t * pSrcA,
  uint32_t srcALen,
  q31_t * pSrcB,
  uint32_t srcBLen,
  q31_t * pDst);

  void csky_vdsp2_correlate_opt_q7(
  q7_t * pSrcA,
  uint32_t srcALen,
  q7_t * pSrcB,
  uint32_t srcBLen,
  q7_t * pDst,
  q15_t * pScratch1,
  q15_t * pScratch2);

  void csky_vdsp2_correlate_q7(
  q7_t * pSrcA,
  uint32_t srcALen,
  q7_t * pSrcB,
  uint32_t srcBLen,
  q7_t * pDst);


  /**
   * @brief Instance structure for the floating-point sparse FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;             /**< number of coefficients in the filter. */
    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
    float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
    float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
  } csky_vdsp2_fir_sparse_instance_f32;

  /**
   * @brief Instance structure for the Q31 sparse FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;             /**< number of coefficients in the filter. */
    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
    q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
    q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
  } csky_vdsp2_fir_sparse_instance_q31;

  /**
   * @brief Instance structure for the Q15 sparse FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;             /**< number of coefficients in the filter. */
    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
    q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
    q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
  } csky_vdsp2_fir_sparse_instance_q15;

  /**
   * @brief Instance structure for the Q7 sparse FIR filter.
   */
  typedef struct
  {
    uint16_t numTaps;             /**< number of coefficients in the filter. */
    uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
    q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
    q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
    uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
    int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
  } csky_vdsp2_fir_sparse_instance_q7;

  void csky_vdsp2_fir_sparse_f32(
  csky_vdsp2_fir_sparse_instance_f32 * S,
  float32_t * pSrc,
  float32_t * pDst,
  float32_t * pScratchIn,
  uint32_t blockSize);

  void csky_vdsp2_fir_sparse_init_f32(
  csky_vdsp2_fir_sparse_instance_f32 * S,
  uint16_t numTaps,
  float32_t * pCoeffs,
  float32_t * pState,
  int32_t * pTapDelay,
  uint16_t maxDelay,
  uint32_t blockSize);

  void csky_vdsp2_fir_sparse_q31(
  csky_vdsp2_fir_sparse_instance_q31 * S,
  q31_t * pSrc,
  q31_t * pDst,
  q31_t * pScratchIn,
  uint32_t blockSize);

  void csky_vdsp2_fir_sparse_init_q31(
  csky_vdsp2_fir_sparse_instance_q31 * S,
  uint16_t numTaps,
  q31_t * pCoeffs,
  q31_t * pState,
  int32_t * pTapDelay,
  uint16_t maxDelay,
  uint32_t blockSize);

  void csky_vdsp2_fir_sparse_q15(
  csky_vdsp2_fir_sparse_instance_q15 * S,
  q15_t * pSrc,
  q15_t * pDst,
  q15_t * pScratchIn,
  q31_t * pScratchOut,
  uint32_t blockSize);

  void csky_vdsp2_fir_sparse_init_q15(
  csky_vdsp2_fir_sparse_instance_q15 * S,
  uint16_t numTaps,
  q15_t * pCoeffs,
  q15_t * pState,
  int32_t * pTapDelay,
  uint16_t maxDelay,
  uint32_t blockSize);

  void csky_vdsp2_fir_sparse_q7(
  csky_vdsp2_fir_sparse_instance_q7 * S,
  q7_t * pSrc,
  q7_t * pDst,
  q7_t * pScratchIn,
  q31_t * pScratchOut,
  uint32_t blockSize);

  void csky_vdsp2_fir_sparse_init_q7(
  csky_vdsp2_fir_sparse_instance_q7 * S,
  uint16_t numTaps,
  q7_t * pCoeffs,
  q7_t * pState,
  int32_t * pTapDelay,
  uint16_t maxDelay,
  uint32_t blockSize);

  void csky_vdsp2_sin_cos_f32(
  float32_t theta,
  float32_t * pSinVal,
  float32_t * pCosVal);

  void csky_vdsp2_sin_cos_q31(
  q31_t theta,
  q31_t * pSinVal,
  q31_t * pCosVal);

  void csky_vdsp2_cmplx_conj_f32(
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_conj_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_conj_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mag_squared_f32(
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mag_squared_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mag_squared_q31_basic(
  q31_t * pSrc,
  q63_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mag_squared_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_vsqrt_f32(
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_vsqrt_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_vsqrt_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_vsqrt_q7(
  q7_t * pSrc,
  q7_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_q7_to_q31(
  q7_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q7_to_q15(
  q7_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q7_to_float(
  q7_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q31_to_float(
  q31_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  csky_vdsp2_status csky_vdsp2_sqrt_f32(
  float32_t in,
  float32_t * pOut);

  csky_vdsp2_status csky_vdsp2_sqrt_q31(
  q31_t in,
  q31_t * pOut);

  csky_vdsp2_status csky_vdsp2_sqrt_q15(
  q15_t in,
  q15_t * pOut);

  void csky_vdsp2_power_q31(
  q31_t * pSrc,
  uint32_t blockSize,
  q63_t * pResult);

  void csky_vdsp2_power_int32(
  int32_t * pSrc,
  uint32_t blockSize,
  q63_t * pResult);

  void csky_vdsp2_power_int32(
  int32_t * pSrc,
  uint32_t blockSize,
  q63_t * pResult);

  void csky_vdsp2_power_f32(
  float32_t * pSrc,
  uint32_t blockSize,
  float32_t * pResult);

  void csky_vdsp2_power_q15(
  q15_t * pSrc,
  uint32_t blockSize,
  q63_t * pResult);

  void csky_vdsp2_power_q7(
  q7_t * pSrc,
  uint32_t blockSize,
  q31_t * pResult);

  void csky_vdsp2_mean_q7(
  q7_t * pSrc,
  uint32_t blockSize,
  q7_t * pResult);

  void csky_vdsp2_mean_q15(
  q15_t * pSrc,
  uint32_t blockSize,
  q15_t * pResult);

  void csky_vdsp2_mean_q31(
  q31_t * pSrc,
  uint32_t blockSize,
  q31_t * pResult);

  void csky_vdsp2_mean_f32(
  float32_t * pSrc,
  uint32_t blockSize,
  float32_t * pResult);

  void csky_vdsp2_var_f32(
  float32_t * pSrc,
  uint32_t blockSize,
  float32_t * pResult);

  void csky_vdsp2_var_q31(
  q31_t * pSrc,
  uint32_t blockSize,
  q31_t * pResult);

  void csky_vdsp2_var_q15(
  q15_t * pSrc,
  uint32_t blockSize,
  q15_t * pResult);

  void csky_vdsp2_rms_f32(
  float32_t * pSrc,
  uint32_t blockSize,
  float32_t * pResult);

  void csky_vdsp2_rms_q31(
  q31_t * pSrc,
  uint32_t blockSize,
  q31_t * pResult);

  void csky_vdsp2_rms_q15(
  q15_t * pSrc,
  uint32_t blockSize,
  q15_t * pResult);

  void csky_vdsp2_std_f32(
  float32_t * pSrc,
  uint32_t blockSize,
  float32_t * pResult);

  void csky_vdsp2_std_q31(
  q31_t * pSrc,
  uint32_t blockSize,
  q31_t * pResult);

  void csky_vdsp2_std_q15(
  q15_t * pSrc,
  uint32_t blockSize,
  q15_t * pResult);

  void csky_vdsp2_cmplx_mag_f32(
  float32_t * pSrc,
  float32_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mag_q31(
  q31_t * pSrc,
  q31_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mag_q15(
  q15_t * pSrc,
  q15_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_dot_prod_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  uint32_t numSamples,
  q31_t * realResult,
  q31_t * imagResult);

  void csky_vdsp2_cmplx_dot_prod_q31(
  q31_t * pSrcA,
  q31_t * pSrcB,
  uint32_t numSamples,
  q63_t * realResult,
  q63_t * imagResult);

  void csky_vdsp2_cmplx_dot_prod_f32(
  float32_t * pSrcA,
  float32_t * pSrcB,
  uint32_t numSamples,
  float32_t * realResult,
  float32_t * imagResult);

  void csky_vdsp2_cmplx_mult_real_q15(
  q15_t * pSrcCmplx,
  q15_t * pSrcReal,
  q15_t * pCmplxDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mult_real_q31(
  q31_t * pSrcCmplx,
  q31_t * pSrcReal,
  q31_t * pCmplxDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mult_real_f32(
  float32_t * pSrcCmplx,
  float32_t * pSrcReal,
  float32_t * pCmplxDst,
  uint32_t numSamples);

  void csky_vdsp2_min_q7(
  q7_t * pSrc,
  uint32_t blockSize,
  q7_t * result,
  uint32_t * index);

  void csky_vdsp2_min_q15(
  q15_t * pSrc,
  uint32_t blockSize,
  q15_t * pResult,
  uint32_t * pIndex);

  void csky_vdsp2_min_q31(
  q31_t * pSrc,
  uint32_t blockSize,
  q31_t * pResult,
  uint32_t * pIndex);

  void csky_vdsp2_min_f32(
  float32_t * pSrc,
  uint32_t blockSize,
  float32_t * pResult,
  uint32_t * pIndex);

  void csky_vdsp2_max_q7(
  q7_t * pSrc,
  uint32_t blockSize,
  q7_t * pResult,
  uint32_t * pIndex);

  void csky_vdsp2_max_q15(
  q15_t * pSrc,
  uint32_t blockSize,
  q15_t * pResult,
  uint32_t * pIndex);

  void csky_vdsp2_max_q31(
  q31_t * pSrc,
  uint32_t blockSize,
  q31_t * pResult,
  uint32_t * pIndex);

  void csky_vdsp2_max_f32(
  float32_t * pSrc,
  uint32_t blockSize,
  float32_t * pResult,
  uint32_t * pIndex);

  void csky_vdsp2_cmplx_mult_cmplx_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mult_cmplx_q31(
  q31_t * pSrcA,
  q31_t * pSrcB,
  q31_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mult_cmplx_f32(
  float32_t * pSrcA,
  float32_t * pSrcB,
  float32_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mult_cmplx_re_q15(
  q15_t * pSrcA,
  q15_t * pSrcB,
  q15_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mult_cmplx_re_q31(
  q31_t * pSrcA,
  q31_t * pSrcB,
  q31_t * pDst,
  uint32_t numSamples);

  void csky_vdsp2_cmplx_mult_cmplx_re_f32(
  float32_t * pSrcA,
  float32_t * pSrcB,
  float32_t * pDst,
  uint32_t numSamples);


  void csky_vdsp2_float_to_q31(
  float32_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_float_to_q15(
  float32_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_float_to_q7(
  float32_t * pSrc,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q31_to_q15(
  q31_t * pSrc,
  q15_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q31_to_q7(
  q31_t * pSrc,
  q7_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q31_to_q7_rs(
  q31_t * pSrc,
  q7_t * pDst,
  uint32_t shiftValue,
  uint32_t blockSize);

  void csky_vdsp2_q63_to_q31_rs(
  q63_t * pSrc,
  q31_t * pDst,
  uint32_t shiftValue,
  uint32_t blockSize);

  void csky_vdsp2_q15_to_float(
  q15_t * pSrc,
  float32_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q15_to_q31(
  q15_t * pSrc,
  q31_t * pDst,
  uint32_t blockSize);

  void csky_vdsp2_q15_to_q7(
  q15_t * pSrc,
  q7_t * pDst,
  uint32_t blockSize);

#ifdef   __cplusplus
}
#endif
#endif /* _CSKY_VDSP2_MATH_H */

/**
 *
 * End of file.
 */
